Loading In the Data
emdat_old <- read_csv('~/Desktop/projects/emdat_proj/full_emdat_geocoded_finally.csv')
Warning: One or more parsing issues, call `problems()` on your data frame for details, e.g.:
dat <- vroom(...)
problems(dat)Rows: 74035 Columns: 51── Column specification ────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (27): Dis No, Seq, Disaster Group, Disaster Subgroup, Disaster Type, Disaster Subtype, Disaster Sub...
dbl (18): Year, Dis Mag Value, Start Year, Start Month, Start Day, End Year, End Month, End Day, Total ...
lgl (5): Glide, Aid Contribution, Reconstruction Costs ('000 US$), Admin1 Code, uncertain_location_spe...
time (1): Local Time
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
emdat_new <- read_csv('~/Desktop/projects/emdat_proj/data/geocoded_21_23_emdat.csv')
Rows: 4987 Columns: 54── Column specification ────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (27): Dis No, Seq, Glide, Disaster Group, Disaster Subgroup, Disaster Type, Disaster Subtype, Disast...
dbl (22): Year, Dis Mag Value, Start Year, Start Month, Start Day, End Year, End Month, End Day, Total D...
lgl (5): AID Contribution ('000 US$), Latitude, Longitude, Local Time, uncertain_location_specificity
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ENSO data
enso_data <- read_csv('~/Desktop/projects/emdat_proj/data/enso_data_copy2.csv')
Rows: 812 Columns: 7── Column specification ────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (4): MonthTxt, PeriodTxt, PeriodNum, Event
dbl (3): Year, MonthNum, Value
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
enso_data %>%
group_by(Event) %>%
count()
emdat_hydrological_enso %>%
filter(!is.na(enso_Value)) %>%
filter(!is.na(wet)) %>%
mutate(enso_positive = (enso_Value > 0)) %>%
group_by(`Dis No`) %>%
summarize(enso_pos = any(enso_positive),enso_val = mean(enso_Value), is_wet = any(wet)) %>%
ggplot(mapping = aes(x = enso_val, fill = is_wet)) +
geom_histogram(stat = "density") +
facet_grid(~is_wet) +
ggtitle("Counts of Disasters compared to value of ONI with wet disasters in Blue and Dry in Red")
Warning: Ignoring unknown parameters: `binwidth`, `bins`, and `pad`







resy <- 2.5
emdat_hydrological_enso %>%
left_join(event_num_months, by = c("Event")) %>%
filter(!is.na(enso_Value)) %>%
filter(!is.na(wet)) %>%
filter(!is.na(lat)) %>%
mutate(wet_factor = as_factor(wet)) %>%
mutate(Event_factor = fct_recode(as_factor(Event), no_event = "N", negative = "-", positive = "+")) %>%
mutate(wet_string = fct_recode(wet_factor, wet_disasters = "TRUE", dry_disasters = "FALSE")) %>%
mutate(lat_box = (as.integer(lat) %/% resy) * resy , lng_box = (as.integer(lng) %/% resy) * resy) %>%
mutate(dis_no_factor = as_factor(`Dis No`)) %>%
group_by(lat_box, lng_box, Event_factor, wet_string, dis_no_factor) %>%
summarize(num_months = first(num_months), num_disaster_month = mean(num_months_disaster)) %>%
group_by(lat_box, lng_box, Event_factor, wet_string) %>%
summarize(num_months = first(num_months), num_disaster_months = sum(num_disaster_month)) %>%
mutate(disaster_rate = num_disaster_months / num_months) %>%
pivot_wider(id_cols = c(lat_box, lng_box), names_from = c(Event_factor, wet_string), values_from = c(disaster_rate), values_fill = 0) %>%
mutate(positive_wet_relative_risk = positive_wet_disasters / no_event_wet_disasters,
negative_wet_relative_risk = negative_wet_disasters / no_event_wet_disasters,
positive_dry_relative_risk = positive_dry_disasters / no_event_dry_disasters,
negative_dry_relative_risk = negative_dry_disasters / no_event_dry_disasters) %>%
select(lat_box, lng_box, positive_wet_relative_risk, negative_wet_relative_risk, positive_dry_relative_risk, negative_dry_relative_risk) %>%
pivot_longer(cols = c(positive_wet_relative_risk, negative_wet_relative_risk, positive_dry_relative_risk, negative_dry_relative_risk), names_to = 'risk', values_to = 'value') %>%
mutate(risk_factor = as_factor(risk), values_cleaned = if_else(is.infinite(value), 18, value)) %>%
mutate(relative_risk_cleaned = if_else(is.nan(values_cleaned), 1, values_cleaned)) %>%
mutate(relative_risk_cleaned2 = if_else((relative_risk_cleaned) > 2, 2, relative_risk_cleaned)) %>%
ggplot() +
geom_sf(data = world_map) +
geom_tile(mapping = aes(x = lng_box, y = lat_box, fill = relative_risk_cleaned2, width = resy)) +
scale_fill_gradient2(midpoint = 1, limits = c(0, 2), low = 'cyan', high = 'red') +
facet_grid(rows = vars(risk_factor))+
ggtitle("Relative risk plot using calculations of the form [{num_disasters_positive_wet / num_months_positive} / {num_disasters_no_event_wet / num_months_no_event}]")
`summarise()` has grouped output by 'lat_box', 'lng_box', 'Event_factor', 'wet_string'. You can override using the `.groups` argument.`summarise()` has grouped output by 'lat_box', 'lng_box', 'Event_factor'. You can override using the `.groups` argument.

resy <- 2.5
emdat_hydrological_enso %>%
left_join(event_num_months, by = c("Event")) %>%
filter(!is.na(enso_Value)) %>%
filter(!is.na(wet)) %>%
filter(!is.na(lat)) %>%
mutate(wet_factor = as_factor(wet)) %>%
mutate(Event_factor = fct_recode(as_factor(Event), no_event = "N", negative = "-", positive = "+")) %>%
mutate(wet_string = fct_recode(wet_factor, wet_disasters = "TRUE", dry_disasters = "FALSE")) %>%
mutate(lat_box = (as.integer(lat) %/% resy) * resy , lng_box = (as.integer(lng) %/% resy) * resy) %>%
mutate(dis_no_factor = as_factor(`Dis No`)) %>%
group_by(lat_box, lng_box, Event_factor, wet_string, dis_no_factor) %>%
summarize(num_months = first(num_months), num_disaster_month = mean(num_months_disaster)) %>%
group_by(lat_box, lng_box, Event_factor, wet_string) %>%
summarize(num_months = first(num_months), num_disaster_months = sum(num_disaster_month)) %>%
mutate(disaster_rate = num_disaster_months / num_months) %>%
pivot_wider(id_cols = c(lat_box, lng_box), names_from = c(Event_factor, wet_string), values_from = c(disaster_rate), values_fill = 0) %>%
mutate(positive_wet_percent = positive_wet_disasters / (no_event_wet_disasters + positive_wet_disasters + negative_wet_disasters) ,
negative_wet_percent = negative_wet_disasters / (no_event_wet_disasters + positive_wet_disasters + negative_wet_disasters),
positive_dry_percent = positive_dry_disasters / (no_event_dry_disasters + positive_dry_disasters + negative_dry_disasters),
negative_dry_percent = negative_dry_disasters / (no_event_dry_disasters + positive_dry_disasters + negative_dry_disasters)
) %>%
select(lat_box, lng_box, positive_wet_percent, negative_wet_percent, positive_dry_percent, negative_dry_percent) %>%
pivot_longer(cols = c(positive_wet_percent, negative_wet_percent, positive_dry_percent, negative_dry_percent), names_to = 'typey', values_to = 'value') %>%
mutate(type_factor = as_factor(typey), values = if_else(is.nan(value), 0, value)) %>%
ggplot() +
geom_sf(data = world_map) +
geom_tile(mapping = aes(x = lng_box, y = lat_box, fill = values, width = resy)) +
scale_fill_gradient(low = 'white', high = 'red') +
facet_grid(rows = vars(type_factor))+
ggtitle("Disaster Percent plot using calculations of the form [positive_wet / all_wet]")
`summarise()` has grouped output by 'lat_box', 'lng_box', 'Event_factor', 'wet_string'. You can override using the `.groups` argument.`summarise()` has grouped output by 'lat_box', 'lng_box', 'Event_factor'. You can override using the `.groups` argument.

# SPLIT YEAR INTO 2
# Apr to Sep
# Oct to Mar
## calculate relative risk within each box
### create boxes acc to resy X resy
resy <- 2.5
event_half_num_months <- enso_data %>%
mutate(spring_summery = as_factor((MonthNum >= 4) & (MonthNum <= 9))) %>%
mutate(half_year = fct_recode(spring_summery, spring_summer = "TRUE", fall_winter = "FALSE")) %>%
group_by(Event, half_year) %>%
summarize(num_months = n())
`summarise()` has grouped output by 'Event'. You can override using the `.groups` argument.
emdat_hydrological_enso %>%
mutate(spring_summery = as_factor((end_month >= 4) & (end_month <= 9))) %>%
mutate(half_year = fct_recode(spring_summery, spring_summer = "TRUE", fall_winter = "FALSE")) %>%
left_join(event_half_num_months, by = c("Event", "half_year")) %>%
filter(!is.na(enso_Value)) %>%
filter(!is.na(wet)) %>%
filter(!is.na(lat)) %>%
mutate(wet_factor = as_factor(wet)) %>%
mutate(Event_factor = fct_recode(as_factor(Event), no_event = "N", negative = "-", positive = "+")) %>%
mutate(wet_string = fct_recode(wet_factor, wet_disasters = "TRUE", dry_disasters = "FALSE")) %>%
mutate(lat_box = (as.integer(lat) %/% resy) * resy , lng_box = (as.integer(lng) %/% resy) * resy) %>%
mutate(dis_no_factor = as_factor(`Dis No`)) %>%
group_by(lat_box, lng_box, Event_factor, wet_string, dis_no_factor, half_year) %>%
summarize(num_months = first(num_months), num_disaster_month = mean(num_months_disaster)) %>%
group_by(lat_box, lng_box, Event_factor, wet_string, half_year) %>%
summarize(num_months = first(num_months), num_disaster_months = sum(num_disaster_month)) %>%
mutate(disaster_rate = num_disaster_months / num_months) %>%
pivot_wider(id_cols = c(lat_box, lng_box, half_year), names_from = c(Event_factor, wet_string), values_from = c(disaster_rate), values_fill = 0) %>%
mutate(positive_wet_relative_risk = positive_wet_disasters / no_event_wet_disasters,
negative_wet_relative_risk = negative_wet_disasters / no_event_wet_disasters,
positive_dry_relative_risk = positive_dry_disasters / no_event_dry_disasters,
negative_dry_relative_risk = negative_dry_disasters / no_event_dry_disasters) %>%
select(lat_box, lng_box, half_year, positive_wet_relative_risk, negative_wet_relative_risk, positive_dry_relative_risk, negative_dry_relative_risk) %>%
pivot_longer(cols = c(positive_wet_relative_risk, negative_wet_relative_risk, positive_dry_relative_risk, negative_dry_relative_risk), names_to = 'risk', values_to = 'value') %>%
mutate(risk_factor = as_factor(risk), values_cleaned = if_else(is.infinite(value), 18, value)) %>%
mutate(relative_risk_cleaned = if_else(is.nan(values_cleaned), 1, values_cleaned)) %>%
mutate(relative_risk_cleaned_truncatedat2 = if_else((relative_risk_cleaned) > 2, 2, relative_risk_cleaned)) %>%
select(lat_box, lng_box, half_year, risk_factor, relative_risk_cleaned, relative_risk_cleaned_truncatedat2) %>%
ggplot() +
geom_sf(data = world_map) +
geom_tile(mapping = aes(x = lng_box, y = lat_box, fill = relative_risk_cleaned_truncatedat2, width = resy)) +
scale_fill_gradient2(midpoint = 1, limits = c(0, 2), low = 'cyan', high = 'red') +
facet_grid(rows = vars(risk_factor), cols = vars(half_year))+
ggtitle("Half year Relative risk plot using calculations of the form [{num_disasters_positive_wet / num_months_positive} / {num_disasters_no_event_wet / num_months_no_event}]")
`summarise()` has grouped output by 'lat_box', 'lng_box', 'Event_factor', 'wet_string', 'dis_no_factor'. You can override using the `.groups` argument.`summarise()` has grouped output by 'lat_box', 'lng_box', 'Event_factor', 'wet_string'. You can override using the `.groups` argument.

NA
Dry Disasters
resy <- 2.5
emdat_hydrological_enso %>%
filter(dry) %>%
mutate(lat_box = (as.integer(lat) %/% resy) * resy , lng_box = (as.integer(lng) %/% resy) * resy) %>%
group_by(`Dis No`, lat_box, lng_box) %>%
summarise(disaster_type = first(`Disaster Type`)) %>%
group_by(disaster_type, lat_box, lng_box) %>%
summarise(county = n()) %>%
filter(county > 0) %>%
ggplot() +
geom_sf(data = world_map) +
geom_tile(mapping = aes(x = lng_box, y = lat_box, fill = county, width = resy)) +
scale_fill_continuous(type="gradient", low = "white", high = "red") +
facet_grid(rows = vars(disaster_type))+
ggtitle("Dry disaster types around the world")
`summarise()` has grouped output by 'Dis No', 'lat_box'. You can override using the `.groups` argument.`summarise()` has grouped output by 'disaster_type', 'lat_box'. You can override using the `.groups` argument.

emdat_hydrological_enso %>%
filter(dry) %>%
filter(!is.na(Event)) %>%
mutate(lat_box = (as.integer(lat) %/% resy) * resy , lng_box = (as.integer(lng) %/% resy) * resy) %>%
group_by(`Dis No`, Event) %>%
summarise(disaster_type = first(`Disaster Type`)) %>%
ggplot() +
geom_bar(mapping = aes(x = disaster_type, stat = "count")) +
facet_grid(~Event)
`summarise()` has grouped output by 'Dis No'. You can override using the `.groups` argument.Warning: Ignoring unknown aesthetics: stat

emdat_hydrological_enso %>%
filter(dry) %>%
filter(!is.na(Event)) %>%
mutate(lat_box = (as.integer(lat) %/% resy) * resy , lng_box = (as.integer(lng) %/% resy) * resy) %>%
group_by(`Dis No`, Event) %>%
summarise(disaster_type = first(`Disaster Type`)) %>%
ggplot() +
geom_bar(mapping = aes(x = Event, stat = "count")) +
facet_grid(~disaster_type)
`summarise()` has grouped output by 'Dis No'. You can override using the `.groups` argument.Warning: Ignoring unknown aesthetics: stat

Looking at ENSO Value


Create Data as NetCDF
resy <- 2.5
df_to_be_spatialized <- emdat_hydrological_enso %>%
mutate(lat_box = (as.integer(lat) %/% resy) * resy , lng_box = (as.integer(lng) %/% resy) * resy) %>%
mutate(month_number = (end_year * 12) + end_month) %>%
group_by(lat_box, lng_box, month_number, `Dis No`, `Disaster Type`) %>%
summarize(enso_value = mean(enso_Value), Event = first(Event), num_months_disaster = mean(num_months_disaster)) %>%
group_by(lat_box, lng_box, month_number, `Disaster Type`) %>%
summarize(enso_value = mean(enso_value), Event = first(Event), num_disaster_months = sum(num_months_disaster)) %>%
filter(!is.na(lat_box))
`summarise()` has grouped output by 'lat_box', 'lng_box', 'month_number', 'Dis No'. You can override using the `.groups` argument.`summarise()` has grouped output by 'lat_box', 'lng_box', 'month_number'. You can override using the `.groups` argument.
---
title: "ENSO analysis new"
output: html_notebook
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

## Packages

```{r}
library(tidyverse)
library(sf)
library(here)
library(weed)
library(rnaturalearth)
library(rnaturalearthdata)
```

## Loading In the Data

```{r}
emdat_old <- read_csv('~/Desktop/projects/emdat_proj/full_emdat_geocoded_finally.csv')
emdat_new <- read_csv('~/Desktop/projects/emdat_proj/data/geocoded_21_23_emdat.csv')

emdat <- bind_rows(emdat_old, emdat_new)
```
# How many disasters were located?

```{r}
emdat %>%
  percent_located_disasters(plot_result=FALSE)
```

# How many locations were located?

```{r}
emdat %>%
  percent_located_locations(plot_result=FALSE)
```
# All types of disasters

```{r}
emdat %>%
  group_by(`Disaster Type`) %>%
  count()
```

# Restrict to dry and wet hydrological disasters

```{r}
dry_list <- c("Drought", "Wildfire")
wet_list <- c("Flood", "Landslide", "Storm")

emdat_hydrological <- emdat %>%
  filter(`Disaster Type` %in% c(dry_list, wet_list)) %>%
  mutate(dry = (`Disaster Type` %in% dry_list), wet = (`Disaster Type` %in% wet_list))
```

```{r}
emdat_hydrological %>%
  group_by(`Disaster Type`) %>%
  count()
```

# ENSO data

```{r}
enso_data <- read_csv('~/Desktop/projects/emdat_proj/data/enso_data_copy2.csv')
enso_data %>%
  group_by(Event) %>%
  count()
```

```{r}

emdat_hydrological2 <- emdat_hydrological %>%
  filter(!is.na(`Start Month`)) %>%
  mutate(start_year = if_else(is.na(`Start Year`), Year, `Start Year`)) %>%
  mutate(end_year = if_else(is.na(`End Year`), start_year, `End Year`)) %>%
  mutate(end_month = if_else(is.na(`End Month`), `Start Month`, `End Month`)) %>%
  mutate(num_months_disaster = (end_month - `Start Month` + 1) + 12 * (end_year - start_year)) %>%
  filter(num_months_disaster > 0)

emdat_hydrological_enso <- emdat_hydrological %>%
  full_join(enso_data, by = c("Year" = "Year", "Start Month" = "MonthNum")) %>%
  rename("enso_Value" = "Value") %>%
  filter(!is.na(`Start Month`)) %>%
  mutate(start_year = if_else(is.na(`Start Year`), Year, `Start Year`)) %>%
  mutate(end_year = if_else(is.na(`End Year`), start_year, `End Year`)) %>%
  mutate(end_month = if_else(is.na(`End Month`), `Start Month`, `End Month`)) %>%
  mutate(num_months_disaster = (end_month - `Start Month` + 1) + 12 * (end_year - start_year)) %>%
  filter(num_months_disaster > 0)

emdat_hydrological_enso %>%
  group_by(num_months_disaster) %>%
  summarize(count_disasters = n_distinct(`Dis No`))
```

```{r}
emdat_hydrological_enso %>%
  filter(!is.na(enso_Value)) %>%
  filter(!is.na(wet)) %>%
  mutate(enso_positive = (enso_Value > 0)) %>%
  group_by(`Dis No`) %>%
  summarize(enso_pos = any(enso_positive),enso_val = mean(enso_Value), is_wet = any(wet)) %>%
  ggplot(mapping = aes(x = enso_val, fill = is_wet)) +
    geom_histogram(stat = "density") +
    facet_grid(~is_wet) +
  ggtitle("Counts of Disasters compared to value of ONI with wet disasters in Blue and Dry in Red")
```
```{r}
emdat_hydrological_enso %>%
  filter(!is.na(enso_Value)) %>%
  filter(!is.na(wet)) %>%
  #mutate(enso_positive = (enso_Value > 0)) %>%
  group_by(`Dis No`) %>%
  summarize(enso_event = first(Event),enso_val = mean(enso_Value), is_wet = any(wet), is_dry = any(dry)) %>%
  mutate(is_wet_num = as.numeric(is_wet), is_dry_num = as.numeric(is_dry)) %>%
  group_by(enso_event) %>%
  summarize(num_disasters_wet = sum(is_wet_num), num_disasters_dry = sum(is_dry_num))
```
```{r}
world_map <- rnaturalearth::ne_countries(scale = "medium", returnclass = "sf")

emdat_hydrological_enso %>%
  filter(!is.na(enso_Value)) %>%
  filter(!is.na(wet)) %>%
  filter(!is.na(lat)) %>%
  mutate(enso_positive = (enso_Value > 0)) %>%
  mutate(wet_factor = as_factor(wet)) %>%
  mutate(wet_string = fct_recode(wet_factor, wet_disasters = "TRUE", dry_disasters = "FALSE")) %>%
  select(lng, lat, enso_Value, wet_string) %>%
  #head(n = 50L) %>%
  ggplot() +
  geom_sf(data = world_map) +
  geom_tile(mapping = aes(x = lng, y = lat, fill = enso_Value, width = 2, height = 2)) +
  scale_fill_gradient2(low="blue", mid = "white", high = "red") +
  facet_grid(~wet_string)
  

```
```{r}

emdat_hydrological_enso %>%
  filter(!is.na(enso_Value)) %>%
  filter(!is.na(wet)) %>%
  filter(!is.na(lat)) %>%
  mutate(wet_factor = as_factor(wet)) %>%
  mutate(Event_factor = fct_recode(as_factor(Event), no_event = "N", negative = "-", positive = "+")) %>%
  mutate(wet_string = fct_recode(wet_factor, wet_disasters = "TRUE", dry_disasters = "FALSE")) %>%
  select(lng, lat, Event_factor, wet_string, wet) %>%
  ggplot() +
  geom_sf(data = world_map) +
  geom_tile(mapping = aes(x = lng, y = lat, width = 2, height = 2, fill = 1, alpha = 0.001)) +
  facet_grid(cols = vars(wet_string), rows = vars(Event_factor))

```
```{r}

event_num_months <- enso_data %>%
  group_by(Event) %>%
  summarize(num_months = n())

emdat_hydrological_enso %>%
  left_join(event_num_months, by = c("Event")) %>%
  filter(!is.na(enso_Value)) %>%
  filter(!is.na(wet)) %>%
  filter(!is.na(lat)) %>%
  mutate(wet_factor = as_factor(wet)) %>%
  mutate(Event_factor = fct_recode(as_factor(Event), no_event = "N", negative = "-", positive = "+")) %>%
  mutate(wet_string = fct_recode(wet_factor, wet_disasters = "TRUE", dry_disasters = "FALSE")) %>%
  select(lng, lat, Event_factor, wet_string, num_months, num_months_disaster) %>%
  add_row(lng = 1, lat = 1, Event_factor = 'no_event', wet_string = 'dry_disasters', num_months = 1) %>%
  ggplot() +
  geom_sf(data = world_map) +
  geom_tile(mapping = aes(x = lng, y = lat, width = 2, height = 2, alpha = num_months_disaster/num_months, fill = 'red')) +
  facet_grid(cols = vars(wet_string), rows = vars(Event_factor)) +
  ggtitle("Same plot as above but scaled by num months")

```

```{r}
emdat_hydrological_enso %>%
  left_join(event_num_months, by = c("Event")) %>%
  filter(!is.na(enso_Value)) %>%
  filter(!is.na(wet)) %>%
  filter(!is.na(lat)) %>%
  mutate(wet_factor = as_factor(wet)) %>%
  mutate(Event_factor = fct_recode(as_factor(Event), no_event = "N", negative = "-", positive = "+")) %>%
  mutate(wet_string = fct_recode(wet_factor, wet_disasters = "TRUE", dry_disasters = "FALSE")) %>%
  select(lng, lat, Event_factor, wet_string, num_months) %>%
  ggplot() +
  geom_sf(data = world_map) +
  stat_bin_2d(mapping = aes(x = lng, y = lat), binwidth = c(2,2)) +
  scale_fill_gradient(trans = 'log', low = 'white', high = 'red') + 
  facet_grid(cols = vars(wet_string), rows = vars(Event_factor)) +
  ggtitle("2D Histogram Counts ")

```
```{r}

emdat_hydrological_enso %>%
  left_join(event_num_months, by = c("Event")) %>%
  filter(!is.na(enso_Value)) %>%
  filter(!is.na(wet)) %>%
  filter(!is.na(lat)) %>%
  mutate(wet_factor = as_factor(wet)) %>%
  mutate(Event_factor = fct_recode(as_factor(Event), no_event = "N", negative = "-", positive = "+")) %>%
  mutate(wet_string = fct_recode(wet_factor, wet_disasters = "TRUE", dry_disasters = "FALSE")) %>%
  select(lng, lat, Event_factor, wet_string, num_months) %>%
  ggplot() +
  geom_sf(data = world_map) +
  stat_bin_2d(mapping = aes(x = lng, y = lat, fill = ..density..), binwidth = c(2,2)) +
  scale_fill_gradient(trans = 'log', low = 'white', high = 'red') + 
  facet_grid(cols = vars(wet_string), rows = vars(Event_factor)) +
  ggtitle("2D Histogram Density")

```

```{r}

emdat_hydrological_enso %>%
  left_join(event_num_months, by = c("Event")) %>%
  filter(!is.na(enso_Value)) %>%
  filter(!is.na(wet)) %>%
  filter(!is.na(lat)) %>%
  mutate(wet_factor = as_factor(wet)) %>%
  mutate(Event_factor = fct_recode(as_factor(Event), no_event = "N", negative = "-", positive = "+")) %>%
  mutate(wet_string = fct_recode(wet_factor, wet_disasters = "TRUE", dry_disasters = "FALSE")) %>%
  select(lng, lat, Event_factor, wet_string, num_months, num_months_disaster) %>%
  ggplot() +
  geom_sf(data = world_map) +
  stat_bin_2d(mapping = aes(x = lng, y = lat, weight = num_months_disaster/num_months), binwidth = c(2,2)) +
  scale_fill_gradient(trans = 'log', low = 'white', high = 'red') + 
  facet_grid(cols = vars(wet_string), rows = vars(Event_factor)) +
  ggtitle("2D Histogram Inverse Weighted by Num Months")

```
```{r}
## calculate relative risk within each box
### create boxes acc to resy X resy

resy <- 2.5

emdat_hydrological_enso %>%
  left_join(event_num_months, by = c("Event")) %>%
  filter(!is.na(enso_Value)) %>%
  filter(!is.na(wet)) %>%
  filter(!is.na(lat)) %>%
  mutate(wet_factor = as_factor(wet)) %>%
  mutate(Event_factor = fct_recode(as_factor(Event), no_event = "N", negative = "-", positive = "+")) %>%
  mutate(wet_string = fct_recode(wet_factor, wet_disasters = "TRUE", dry_disasters = "FALSE")) %>%
  mutate(lat_box = (as.integer(lat) %/% resy) * resy , lng_box = (as.integer(lng) %/% resy) * resy) %>%
  mutate(dis_no_factor = as_factor(`Dis No`)) %>%
  group_by(lat_box, lng_box, Event_factor, wet_string, dis_no_factor) %>%
  summarize(num_months = first(num_months), num_disaster_month = mean(num_months_disaster)) %>%
  group_by(lat_box, lng_box, Event_factor, wet_string) %>%
  summarize(num_months = first(num_months), num_disaster_months = sum(num_disaster_month)) %>%
  mutate(disaster_rate = num_disaster_months / num_months) %>%
  pivot_wider(id_cols = c(lat_box, lng_box), names_from = c(Event_factor, wet_string), values_from = c(disaster_rate), values_fill = 0) %>%
  mutate(positive_wet_relative_risk = positive_wet_disasters / no_event_wet_disasters, 
         negative_wet_relative_risk = negative_wet_disasters / no_event_wet_disasters, 
         positive_dry_relative_risk =  positive_dry_disasters / no_event_dry_disasters,
         negative_dry_relative_risk = negative_dry_disasters / no_event_dry_disasters) %>%
  select(lat_box, lng_box, positive_wet_relative_risk, negative_wet_relative_risk, positive_dry_relative_risk, negative_dry_relative_risk) %>%
  pivot_longer(cols = c(positive_wet_relative_risk, negative_wet_relative_risk, positive_dry_relative_risk, negative_dry_relative_risk), names_to = 'risk', values_to = 'value') %>%
  mutate(risk_factor = as_factor(risk), values_cleaned = if_else(is.infinite(value), 18, value)) %>%
  mutate(relative_risk_cleaned = if_else(is.nan(values_cleaned), 1, values_cleaned)) %>%
  mutate(relative_risk_cleaned2 = if_else((relative_risk_cleaned) > 2, 2, relative_risk_cleaned)) %>%
  ggplot() +
  geom_sf(data = world_map) +
  geom_tile(mapping = aes(x = lng_box, y = lat_box, fill = relative_risk_cleaned2, width = resy)) +
  scale_fill_gradient2(midpoint = 1, limits = c(0, 2), low = 'cyan', high = 'red') +
  facet_grid(rows = vars(risk_factor))+
  ggtitle("Relative risk plot using calculations of the form [{num_disasters_positive_wet / num_months_positive} / {num_disasters_no_event_wet / num_months_no_event}]")
  
  

```

```{r}
## calculate percent of total disasters within each box
### create boxes 2x2

resy <- 2.5

emdat_hydrological_enso %>%
  left_join(event_num_months, by = c("Event")) %>%
  filter(!is.na(enso_Value)) %>%
  filter(!is.na(wet)) %>%
  filter(!is.na(lat)) %>%
  mutate(wet_factor = as_factor(wet)) %>%
  mutate(Event_factor = fct_recode(as_factor(Event), no_event = "N", negative = "-", positive = "+")) %>%
  mutate(wet_string = fct_recode(wet_factor, wet_disasters = "TRUE", dry_disasters = "FALSE")) %>%
  mutate(lat_box = (as.integer(lat) %/% resy) * resy , lng_box = (as.integer(lng) %/% resy) * resy) %>%
  mutate(dis_no_factor = as_factor(`Dis No`)) %>%
  group_by(lat_box, lng_box, Event_factor, wet_string, dis_no_factor) %>%
  summarize(num_months = first(num_months), num_disaster_month = mean(num_months_disaster)) %>%
  group_by(lat_box, lng_box, Event_factor, wet_string) %>%
  summarize(num_months = first(num_months), num_disaster_months = sum(num_disaster_month)) %>%
  mutate(disaster_rate = num_disaster_months / num_months) %>%
  pivot_wider(id_cols = c(lat_box, lng_box), names_from = c(Event_factor, wet_string), values_from = c(disaster_rate), values_fill = 0) %>%
  mutate(positive_wet_percent = positive_wet_disasters / (no_event_wet_disasters + positive_wet_disasters + negative_wet_disasters) , 
         negative_wet_percent = negative_wet_disasters / (no_event_wet_disasters + positive_wet_disasters + negative_wet_disasters), 
         positive_dry_percent =  positive_dry_disasters / (no_event_dry_disasters + positive_dry_disasters + negative_dry_disasters),
         negative_dry_percent =  negative_dry_disasters / (no_event_dry_disasters + positive_dry_disasters + negative_dry_disasters)
         ) %>%
  select(lat_box, lng_box, positive_wet_percent, negative_wet_percent, positive_dry_percent, negative_dry_percent) %>%
  pivot_longer(cols = c(positive_wet_percent, negative_wet_percent, positive_dry_percent, negative_dry_percent), names_to = 'typey', values_to = 'value') %>%
  mutate(type_factor = as_factor(typey), values = if_else(is.nan(value), 0, value)) %>%
  ggplot() +
  geom_sf(data = world_map) +
  geom_tile(mapping = aes(x = lng_box, y = lat_box, fill = values, width = resy)) +
  scale_fill_gradient(low = 'white', high = 'red') +
  facet_grid(rows = vars(type_factor))+
  ggtitle("Disaster Percent plot using calculations of the form [positive_wet / all_wet]")
  

```


```{r}
# SPLIT YEAR INTO 2
# Apr to Sep
# Oct to Mar
## calculate relative risk within each box
### create boxes acc to resy X resy

resy <- 2.5

event_half_num_months <- enso_data %>%
  mutate(spring_summery = as_factor((MonthNum >= 4) & (MonthNum <= 9))) %>%
  mutate(half_year = fct_recode(spring_summery, spring_summer = "TRUE", fall_winter = "FALSE")) %>%
  group_by(Event, half_year) %>%
  summarize(num_months = n())

emdat_hydrological_enso %>%
  mutate(spring_summery = as_factor((end_month >= 4) & (end_month <= 9))) %>%
  mutate(half_year = fct_recode(spring_summery, spring_summer = "TRUE", fall_winter = "FALSE")) %>%
  left_join(event_half_num_months, by = c("Event", "half_year")) %>%
  filter(!is.na(enso_Value)) %>%
  filter(!is.na(wet)) %>%
  filter(!is.na(lat)) %>%
  mutate(wet_factor = as_factor(wet)) %>%
  mutate(Event_factor = fct_recode(as_factor(Event), no_event = "N", negative = "-", positive = "+")) %>%
  mutate(wet_string = fct_recode(wet_factor, wet_disasters = "TRUE", dry_disasters = "FALSE")) %>%
  mutate(lat_box = (as.integer(lat) %/% resy) * resy , lng_box = (as.integer(lng) %/% resy) * resy) %>%
  mutate(dis_no_factor = as_factor(`Dis No`)) %>%
  group_by(lat_box, lng_box, Event_factor, wet_string, dis_no_factor, half_year) %>%
  summarize(num_months = first(num_months), num_disaster_month = mean(num_months_disaster)) %>%
  group_by(lat_box, lng_box, Event_factor, wet_string, half_year) %>%
  summarize(num_months = first(num_months), num_disaster_months = sum(num_disaster_month)) %>%
  mutate(disaster_rate = num_disaster_months / num_months) %>%
  pivot_wider(id_cols = c(lat_box, lng_box, half_year), names_from = c(Event_factor, wet_string), values_from = c(disaster_rate), values_fill = 0) %>%
  mutate(positive_wet_relative_risk = positive_wet_disasters / no_event_wet_disasters, 
         negative_wet_relative_risk = negative_wet_disasters / no_event_wet_disasters, 
         positive_dry_relative_risk =  positive_dry_disasters / no_event_dry_disasters,
         negative_dry_relative_risk = negative_dry_disasters / no_event_dry_disasters) %>%
  select(lat_box, lng_box, half_year, positive_wet_relative_risk, negative_wet_relative_risk, positive_dry_relative_risk, negative_dry_relative_risk) %>%
  pivot_longer(cols = c(positive_wet_relative_risk, negative_wet_relative_risk, positive_dry_relative_risk, negative_dry_relative_risk), names_to = 'risk', values_to = 'value') %>%
  mutate(risk_factor = as_factor(risk), values_cleaned = if_else(is.infinite(value), 18, value)) %>%
  mutate(relative_risk_cleaned = if_else(is.nan(values_cleaned), 1, values_cleaned)) %>%
  mutate(relative_risk_cleaned_truncatedat2 = if_else((relative_risk_cleaned) > 2, 2, relative_risk_cleaned)) %>%
  select(lat_box, lng_box, half_year, risk_factor, relative_risk_cleaned, relative_risk_cleaned_truncatedat2) %>%
  ggplot() +
  geom_sf(data = world_map) +
  geom_tile(mapping = aes(x = lng_box, y = lat_box, fill = relative_risk_cleaned_truncatedat2, width = resy)) +
  scale_fill_gradient2(midpoint = 1, limits = c(0, 2), low = 'cyan', high = 'red') +
  facet_grid(rows = vars(risk_factor), cols = vars(half_year))+
  ggtitle("Half year Relative risk plot using calculations of the form [{num_disasters_positive_wet / num_months_positive} / {num_disasters_no_event_wet / num_months_no_event}]")
  
  

```

## Dry Disasters

```{r}
resy <- 2.5

emdat_hydrological_enso %>%
  filter(dry) %>%
  mutate(lat_box = (as.integer(lat) %/% resy) * resy , lng_box = (as.integer(lng) %/% resy) * resy) %>%
  group_by(`Dis No`, lat_box, lng_box) %>%
  summarise(disaster_type = first(`Disaster Type`)) %>%
  group_by(disaster_type, lat_box, lng_box) %>%
  summarise(county = n()) %>%
  filter(county > 0) %>%
  ggplot() +
  geom_sf(data = world_map) +
  geom_tile(mapping = aes(x = lng_box, y = lat_box, fill = county, width = resy)) +
  scale_fill_continuous(type="gradient", low = "white", high = "red") +
  facet_grid(rows = vars(disaster_type))+
  ggtitle("Dry disaster types around the world")
  
  
  
```

```{r}
emdat_hydrological_enso %>%
  filter(dry) %>%
  mutate(lat_box = (as.integer(lat) %/% resy) * resy , lng_box = (as.integer(lng) %/% resy) * resy) %>%
  group_by(`Dis No`) %>%
  summarise(disaster_type = first(`Disaster Type`)) %>%
  group_by(disaster_type) %>%
  summarise(county = n())

```

```{r}
emdat_hydrological_enso %>%
  filter(dry) %>%
  filter(!is.na(Event)) %>%
  mutate(lat_box = (as.integer(lat) %/% resy) * resy , lng_box = (as.integer(lng) %/% resy) * resy) %>%
  group_by(`Dis No`, Event) %>%
  summarise(disaster_type = first(`Disaster Type`)) %>%
  ggplot() +
  geom_bar(mapping = aes(x = disaster_type, stat = "count")) +
  facet_grid(~Event)

```

```{r}
emdat_hydrological_enso %>%
  filter(dry) %>%
  filter(!is.na(Event)) %>%
  mutate(lat_box = (as.integer(lat) %/% resy) * resy , lng_box = (as.integer(lng) %/% resy) * resy) %>%
  group_by(`Dis No`, Event) %>%
  summarise(disaster_type = first(`Disaster Type`)) %>%
  ggplot() +
  geom_bar(mapping = aes(x = Event, stat = "count")) +
  facet_grid(~disaster_type)

```
## Looking at ENSO Value

```{r}
emdat_hydrological_enso %>%
  group_by(`Dis No`) %>%
  summarize(disaster_type = first(`Disaster Type`), lat = mean(lat), lng = mean(lng), enso_value = mean(enso_Value), Event = first(Event), dry = first(dry), wet = first(wet)) %>%
  ggplot() +
  geom_histogram(mapping = aes(x = enso_value), binwidth = 0.1) 

```

```{r}
emdat_hydrological_enso %>%
  filter(!is.na(`Disaster Type`)) %>%
  group_by(`Dis No`) %>%
  summarize(disaster_type = first(`Disaster Type`), lat = mean(lat), lng = mean(lng), enso_value = mean(enso_Value), Event = first(Event), dry = first(dry), wet = first(wet)) %>%
  ggplot() +
  geom_histogram(mapping = aes(x = enso_value), binwidth = 0.1) +
  facet_grid(~disaster_type)


```
## Create Data as NetCDF

```{r}
resy <- 2.5

df_to_be_spatialized <- emdat_hydrological %>%
  mutate(lat_box = (as.integer(lat) %/% resy) * resy , lng_box = (as.integer(lng) %/% resy) * resy) %>%
  mutate(month_number = (end_year * 12) + end_month) %>%
  group_by(lat_box, lng_box, month_number, `Dis No`, `Disaster Type`) %>%
  summarize(num_months_disaster = mean(num_months_disaster)) %>%
  group_by(lat_box, lng_box, month_number, `Disaster Type`) %>%
  summarize(num_disaster_months = sum(num_months_disaster)) %>%
  filter(!is.na(lat_box))

```

```{r}

df_to_be_spatialized2 %>%
  write_csv("~/Desktop/projects/emdat_proj/data/df_to_be_spatialized2.csv")

## Conversion to be done in pandas in python

```
```{r}

df_to_be_spatialized %>%
  mutate(month_of_year = month_number %% 12) %>%
  mutate(year = (month_number - month_of_year) / 12) %>%
  arrange(month_number)


```
```{r}

emdat_hydrological_enso %>%
  arrange(-1 * num_months_disaster)

```

```{r}
enso_valuesy <- enso_data %>%
  mutate(month_number = (Year * 12) + MonthNum, enso_value = Value) %>%
  select(month_number, enso_value)

new_emdat_data <- emdat_hydrological2 %>%
  mutate(start_month_number = `Start Month` + (12 * start_year)) %>%
  mutate(end_month_number = end_month + (12 * end_year)) %>%
  rowwise() %>%
  mutate(monthies = list(tibble(month_number = start_month_number:end_month_number))) %>%
  filter(num_months_disaster > 2) %>%
  unnest(cols = monthies) %>%
  mutate(lat_box = (as.integer(lat) %/% resy) * resy , lng_box = (as.integer(lng) %/% resy) * resy) %>%
  group_by(lat_box, lng_box, month_number, `Dis No`, `Disaster Type`) %>%
  summarize(mean_months = mean(num_months_disaster)) %>%
  group_by(lat_box, lng_box, month_number, `Disaster Type`) %>%
  summarize(disaster = (sum(mean_months) > 0)) %>%
  filter(!is.na(lat_box)) 

df_to_be_spatialized2 <- new_emdat_data

new_enso_data_oni <- read_tsv("~/Desktop/oni_enso.tsv")

new_enso_data_oni %>%
  pivot_longer(cols = colnames(new_enso_data_oni)[-1],
               names_to = "Month",
               values_to = "enso_oni_value") %>%
  rowwise() %>%
  mutate(month_num = month_text_to_num(Month)) %>%
  mutate(month_number = month_num + (12 * Year)) %>%
  write_csv("~/Desktop/projects/emdat_proj/data/enso_data_updated_6_9_23.csv")
  

listy <- list()
listy["DJF"] = 1
listy["JFM"] = 2
listy["FMA"] = 3
listy["MAM"] = 4
listy["AMJ"] = 5
listy["MJJ"] = 6
listy["JJA"] = 7
listy["JAS"] = 8
listy["ASO"] = 9
listy["SON"] = 10
listy["OND"] = 11
listy["NDJ"] = 12

month_text_to_num <- function(x) listy[[x]]

new_emdat_data %>%
  write_csv("~/Desktop/projects/emdat_proj/data/emdat_df_to_be_spatialized_updated_6_9_23.csv")



  

```
